<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  

  
  <title>&#39;热点抓取封装函数&#39; | Hexo</title>
  <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
  <meta name="description" content="import requestsimport redef get_html(url):    headers &#x3D; {        “User-Agent”: “Mozilla&#x2F;5.0 (Windows NT 10.0; Win64; x64) AppleWebKit&#x2F;537.36 (KHTML, like Gecko) Chrome&#x2F;81.0.4044.113 Safari&#x2F;537.36”">
<meta property="og:type" content="article">
<meta property="og:title" content="&#39;热点抓取封装函数&#39;">
<meta property="og:url" content="http://yoursite.com/2020/04/25/%E7%83%AD%E7%82%B9%E6%8A%93%E5%8F%96%E5%B0%81%E8%A3%85%E5%87%BD%E6%95%B0/index.html">
<meta property="og:site_name" content="Hexo">
<meta property="og:description" content="import requestsimport redef get_html(url):    headers &#x3D; {        “User-Agent”: “Mozilla&#x2F;5.0 (Windows NT 10.0; Win64; x64) AppleWebKit&#x2F;537.36 (KHTML, like Gecko) Chrome&#x2F;81.0.4044.113 Safari&#x2F;537.36”">
<meta property="og:locale" content="en_US">
<meta property="article:published_time" content="2020-04-25T03:03:44.000Z">
<meta property="article:modified_time" content="2020-04-25T03:05:03.483Z">
<meta property="article:author" content="John Doe">
<meta name="twitter:card" content="summary">
  
    <link rel="alternate" href="/atom.xml" title="Hexo" type="application/atom+xml">
  
  
    <link rel="icon" href="/favicon.png">
  
  
    <link href="//fonts.googleapis.com/css?family=Source+Code+Pro" rel="stylesheet" type="text/css">
  
  
<link rel="stylesheet" href="/css/style.css">

<meta name="generator" content="Hexo 4.2.0"></head>

<body>
  <div id="container">
    <div id="wrap">
      <header id="header">
  <div id="banner"></div>
  <div id="header-outer" class="outer">
    <div id="header-title" class="inner">
      <h1 id="logo-wrap">
        <a href="/" id="logo">Hexo</a>
      </h1>
      
    </div>
    <div id="header-inner" class="inner">
      <nav id="main-nav">
        <a id="main-nav-toggle" class="nav-icon"></a>
        
          <a class="main-nav-link" href="/">Home</a>
        
          <a class="main-nav-link" href="/archives">Archives</a>
        
      </nav>
      <nav id="sub-nav">
        
          <a id="nav-rss-link" class="nav-icon" href="/atom.xml" title="RSS Feed"></a>
        
        <a id="nav-search-btn" class="nav-icon" title="Search"></a>
      </nav>
      <div id="search-form-wrap">
        <form action="//google.com/search" method="get" accept-charset="UTF-8" class="search-form"><input type="search" name="q" class="search-form-input" placeholder="Search"><button type="submit" class="search-form-submit">&#xF002;</button><input type="hidden" name="sitesearch" value="http://yoursite.com"></form>
      </div>
    </div>
  </div>
</header>
      <div class="outer">
        <section id="main"><article id="post-热点抓取封装函数" class="article article-type-post" itemscope itemprop="blogPost">
  <div class="article-meta">
    <a href="/2020/04/25/%E7%83%AD%E7%82%B9%E6%8A%93%E5%8F%96%E5%B0%81%E8%A3%85%E5%87%BD%E6%95%B0/" class="article-date">
  <time datetime="2020-04-25T03:03:44.000Z" itemprop="datePublished">2020-04-25</time>
</a>
    
  </div>
  <div class="article-inner">
    
    
      <header class="article-header">
        
  
    <h1 class="article-title" itemprop="name">
      &#39;热点抓取封装函数&#39;
    </h1>
  

      </header>
    
    <div class="article-entry" itemprop="articleBody">
      
        <p>import requests<br>import re<br>def get_html(url):<br>    headers = {<br>        “User-Agent”: “Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.113 Safari/537.36”<br>    }<br>    url = “<a href="http://top.baidu.com/buzz?b=341&amp;c=513&amp;fr=topbuzz_b1&quot;" target="_blank" rel="noopener">http://top.baidu.com/buzz?b=341&amp;c=513&amp;fr=topbuzz_b1&quot;</a><br>    response = requests.get(url, headers)<br>def ex_html(html):<br>    html = response.content.decode(“gb2312”)</p>
<p>#print(html)</p>
<pre><code>pattern = &apos;&lt;td\sclass=&quot;keyword&quot;&gt;\s+&lt;a.*?&gt;(.*?)&lt;/a&gt;&apos;</code></pre><p>def parser(result):<br>    result = re.findall(pattern, html)<br>    print(result)<br>#len(result)<br>#temp = 1<br>#for i in result:</p>
<h1 id="print-f”-temp-、-i-”"><a href="#print-f”-temp-、-i-”" class="headerlink" title="print(f”{temp}、{i}”)"></a>print(f”{temp}、{i}”)</h1><h1 id="temp-1"><a href="#temp-1" class="headerlink" title="temp+=1"></a>temp+=1</h1><p>def run():<br>    for index,msg in enumerate(result,1):<br>        print(f”{index}、{msg}”)</p>

      
    </div>
    <footer class="article-footer">
      <a data-url="http://yoursite.com/2020/04/25/%E7%83%AD%E7%82%B9%E6%8A%93%E5%8F%96%E5%B0%81%E8%A3%85%E5%87%BD%E6%95%B0/" data-id="ck9frj1k90008hgv1603i4u3x" class="article-share-link">Share</a>
      
      
    </footer>
  </div>
  
    
<nav id="article-nav">
  
    <a href="/2020/04/25/%E7%88%AC%E5%8F%96%E5%BF%85%E5%BA%94%E5%A3%81%E7%BA%B8/" id="article-nav-newer" class="article-nav-link-wrap">
      <strong class="article-nav-caption">Newer</strong>
      <div class="article-nav-title">
        
          &#39;爬取必应壁纸&#39;
        
      </div>
    </a>
  
  
    <a href="/2020/04/25/%E8%B6%85%E7%9C%81%E4%BB%BD%E9%93%BE%E6%8E%A5/" id="article-nav-older" class="article-nav-link-wrap">
      <strong class="article-nav-caption">Older</strong>
      <div class="article-nav-title">&#39;超省份链接&#39;</div>
    </a>
  
</nav>

  
</article>

</section>
        
          <aside id="sidebar">
  
    

  
    

  
    
  
    
  <div class="widget-wrap">
    <h3 class="widget-title">Archives</h3>
    <div class="widget">
      <ul class="archive-list"><li class="archive-list-item"><a class="archive-list-link" href="/archives/2020/04/">April 2020</a></li><li class="archive-list-item"><a class="archive-list-link" href="/archives/2020/02/">February 2020</a></li></ul>
    </div>
  </div>


  
    
  <div class="widget-wrap">
    <h3 class="widget-title">Recent Posts</h3>
    <div class="widget">
      <ul>
        
          <li>
            <a href="/2020/04/25/%E7%AC%94%E8%AE%B0/">&#39;笔记&#39;</a>
          </li>
        
          <li>
            <a href="/2020/04/25/%E6%8F%90%E5%8F%96IP%E4%BF%A1%E6%81%AF/">&#39;提取IP信息&#39;</a>
          </li>
        
          <li>
            <a href="/2020/04/25/%E5%BF%85%E5%BA%94%E7%BF%BB%E8%AF%91/">&#39;必应翻译&#39;</a>
          </li>
        
          <li>
            <a href="/2020/04/25/B%E7%AB%99%E8%AF%84%E8%AE%BA/">&#39;B站评论&#39;</a>
          </li>
        
          <li>
            <a href="/2020/04/25/B%E7%AB%99%E8%AF%84%E8%AE%BA-ruquests%E2%80%99/">&#39;B站评论_ruquests’</a>
          </li>
        
      </ul>
    </div>
  </div>

  
</aside>
        
      </div>
      <footer id="footer">
  
  <div class="outer">
    <div id="footer-info" class="inner">
      &copy; 2020 John Doe<br>
      Powered by <a href="http://hexo.io/" target="_blank">Hexo</a>
    </div>
  </div>
</footer>
    </div>
    <nav id="mobile-nav">
  
    <a href="/" class="mobile-nav-link">Home</a>
  
    <a href="/archives" class="mobile-nav-link">Archives</a>
  
</nav>
    

<script src="//ajax.googleapis.com/ajax/libs/jquery/2.0.3/jquery.min.js"></script>


  
<link rel="stylesheet" href="/fancybox/jquery.fancybox.css">

  
<script src="/fancybox/jquery.fancybox.pack.js"></script>




<script src="/js/script.js"></script>




  </div>
</body>
</html>